train_rnn_mimo <- function (data,
date_col,
input_cols,
output_cols,
val_split = 0.2 ,
epochs = 50 ,
batch_size = 32 ,
lr = 1e-3 ,
optimizer = c ("adam" ,"sgd" ),
hidden_size = 50 ,
num_layers = 1 ,
activation = c ("tanh" ,"relu" ,"linear" ),
dropout = 0.0 ,
weight_decay = 0.0 ) {
optimizer <- match.arg (optimizer)
activation <- match.arg (activation)
date_col <- rlang:: ensym (date_col)
# 1) Order by time index
data <- data %>% arrange (!! date_col)
# 2) Split data
n <- nrow (data)
n_val <- floor (val_split * n)
train_df <- data[1 : (n - n_val), ]
val_df <- data[(n - n_val + 1 ): n, ]
# 3) Compute robust scaler on train_df
input_median <- sapply (input_cols, function (col) median (train_df[[col]], na.rm = TRUE ))
input_iqr <- sapply (input_cols, function (col) IQR (train_df[[col]], na.rm = TRUE ))
output_median <- sapply (output_cols,function (col) median (train_df[[col]], na.rm = TRUE ))
output_iqr <- sapply (output_cols,function (col) IQR (train_df[[col]], na.rm = TRUE ))
scaler <- list (
input_median = input_median,
input_iqr = input_iqr,
output_median = output_median,
output_iqr = output_iqr
)
# 4) Apply scaling to train and validation sets
for (col in input_cols) {
train_df[[col]] <- (train_df[[col]] - scaler$ input_median[col]) / scaler$ input_iqr[col]
val_df[[col]] <- (val_df[[col]] - scaler$ input_median[col]) / scaler$ input_iqr[col]
}
for (col in output_cols) {
train_df[[col]] <- (train_df[[col]] - scaler$ output_median[col]) / scaler$ output_iqr[col]
val_df[[col]] <- (val_df[[col]] - scaler$ output_median[col]) / scaler$ output_iqr[col]
}
# 5) Define the RNN module
RNNModel <- nn_module (
"RNNModel" ,
initialize = function (input_size,
hidden_size,
num_layers,
dropout,
output_size,
activation,
nonlinearity = c ("tanh" ,"relu" )) {
nonlinearity <- match.arg (nonlinearity)
self$ rnn <- nn_rnn (
input_size = input_size,
hidden_size = hidden_size,
num_layers = num_layers,
nonlinearity = nonlinearity,
batch_first = TRUE ,
dropout = dropout
)
self$ fc <- nn_linear (hidden_size, output_size)
self$ act <- switch (
activation,
tanh = nn_tanh (),
relu = nn_relu (),
linear = nn_identity ()
)
},
forward = function (x) {
# x: [batch, seq_len, input_size]
out <- self$ rnn (x)
# out[[1]] is the output at every time step: shape [batch, seq_len, hidden]
last <- out[[1 ]][ , dim (out[[1 ]])[2 ], ]
h_act <- self$ act (last)
self$ fc (h_act)
}
)
# 6) Prepare torch datasets
make_ds <- function (df) {
x_mat <- as.matrix (df[, input_cols])
y_mat <- as.matrix (df[, output_cols])
X <- torch_tensor (x_mat, dtype = torch_float ())$ view (c (nrow (x_mat), - 1 , length (input_cols)))
Y <- torch_tensor (y_mat, dtype = torch_float ())
list (x = X, y = Y)
}
train_ds <- make_ds (train_df)
val_ds <- make_ds (val_df)
# 7) Instantiate model and optimizer
model <- RNNModel (
input_size = length (input_cols),
hidden_size = hidden_size,
num_layers = num_layers,
dropout = dropout,
output_size = length (output_cols),
activation = activation,
nonlinearity = "relu" )
optim <- switch (
optimizer,
adam = optim_adam (model$ parameters, lr = lr, weight_decay = weight_decay),
sgd = optim_sgd (model$ parameters, lr = lr, weight_decay = weight_decay)
)
criterion <- nn_mse_loss ()
# 8) Training loop
train_loss <- numeric (epochs)
val_loss <- numeric (epochs)
for (e in seq_len (epochs)) {
model$ train ()
optim$ zero_grad ()
preds_train <- model (train_ds$ x)
loss_train <- criterion (preds_train, train_ds$ y)
loss_train$ backward ()
optim$ step ()
train_loss[e] <- loss_train$ item ()
model$ eval ()
with_no_grad ({
preds_val <- model (val_ds$ x)
val_loss[e] <- criterion (preds_val, val_ds$ y)$ item ()
})
}
list (
model = model,
train_loss = train_loss,
val_loss = val_loss,
scaler = scaler,
input_cols = input_cols,
output_cols = output_cols,
date_col = rlang:: as_string (date_col)
)
}